
****************************************************************
* Figure 1 & Analysis of Merged Data for All Post-1990 Patents *
****************************************************************

* Examining PatentsView and PatEx discrepancies
use "term_disc_merged.dta", clear
keep if source == "both" & term_disc_pv == 1 & term_disc_patex == 0
* Only 9,692 patents in both sources have term disc in PatentsView but not PatEx
* – Spot check suggests these do have disclaimer on face of patent or certificate of correction, but not in PatentCenter transactions (or not properly coded)
use "term_disc_merged.dta", clear
keep if source == "both" & term_disc_pv == 0 & term_disc_patex == 1
* 329,908 patents in both sources have term disc in PatEx but not PatentsView
* – Many of these do have term disc on face of patent, but maybe issue is that it's hyphenated across lines ("dis-claimer")?

* Analysis for all patents
use "term_disc_merged.dta", clear
tab term_disc
* 735,320 out of 6,755,041 (11%) have a terminal disclaimer
gen issue_year = year(patent_date)
sort issue_year
bysort issue_year: egen patents_per_year = count(patent_id)
bysort issue_year: egen term_disc_per_year = total(term_disc == 1)
gen percent_term_disc_per_year = 100 * term_disc_per_year / patents_per_year
save "term_disc_merged_analysis.dta", replace
duplicates drop issue_year, force

* Graph (edit axis range 1990 to 2023, Delta 10)
twoway (line percent_term_disc_per_year issue_year), title("All Utility Patents") ///
xtitle("Patent Issue Year") ytitle("Percentage with Terminal Disclaimers")


************************************************
* Figure 2 & Analysis of Litigated Patent Data *
************************************************

* Analysis for litigated patents
use "litigated_patents_term_disc_no_dups.dta", clear
tab term_disc
* 12,777 out of 48,412 (26%) have a terminal disclaimer
gen issue_year = year(patent_date)
sort issue_year
bysort issue_year: egen patents_per_year = count(patent_id)
bysort issue_year: egen term_disc_per_year = total(term_disc == 1)
gen percent_term_disc_per_year = 100 * term_disc_per_year / patents_per_year
* Only one patent from 2021 (which has term disc)
drop if issue_year == 2021
twoway (line percent_term_disc_per_year issue_year), title("Litigated Patents") ///
xtitle("Patent Issue Year") ytitle("Percentage with Terminal Disclaimers") 


*******************************************
* Figure 3 & Analysis of Orange Book Data *
*******************************************

* Analysis for Orange Book patents (by NDA)
use "nda_patents_term_disc_no_dups.dta", clear
tab term_disc 
* 4,441 out of 8,859 unique patents (50%) have a terminal disclaimer
gen issue_year = year(patent_date)
sort issue_year
bysort issue_year: egen patents_per_year = count(patent_id)
bysort issue_year: egen term_disc_per_year = total(term_disc == 1)
gen percent_term_disc_per_year = 100 * term_disc_per_year / patents_per_year
twoway (line percent_term_disc_per_year issue_year), title("Orange Book Patents") ///
xtitle("Patent Issue Year") ytitle("Percentage with Terminal Disclaimers") 


************************************
* Term Extensions for Each Dataset *
************************************

* Open each of the 3 main datasets, then...
* Drop patents filed before PTA statute took effect
drop if filing_date < td(29may2000)
keep if term_disc == 1
gen has_extension = 0
replace has_extension = 1 if term_extension != .
tab has_extension
summarize term_extension
